import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import visual_behavior_glm
import visual_behavior_glm.GLM_params as glm_params
import visual_behavior_glm.GLM_analysis_tools as gat
import visual_behavior.data_access.loading as loading
import visual_behavior.data_access.utilities as utilities
import visual_behavior.visualization.utils as utils
import seaborn as sns
sns.set_context('notebook', font_scale=1.5, rc={'lines.markeredgewidth': 2})
sns.set_style('white', {'axes.spines.right': False, 'axes.spines.top': False, 'xtick.bottom': True, 'ytick.left': True, })
sns.set_palette('deep')
%matplotlib inline
%load_ext autoreload
%autoreload 2
experiments_table = loading.get_platform_paper_experiment_table()
cells_table = loading.get_cell_table()
model_output_type = 'adj_fraction_change_from_full'
glm_version = '15_events_L2_optimize_by_session'
rspm = gat.build_pivoted_results_summary(value_to_use=model_output_type, results_summary=None,
glm_version=glm_version, cutoff=None, add_extra_columns=True)
original_rspm = rspm.copy()
model_output_type = 'adj_fraction_change_from_full'
glm_version = '15_events_L2_optimize_by_session'
filtered_rspm = gat.build_pivoted_results_summary(value_to_use=model_output_type, results_summary=None,
glm_version=glm_version, cutoff=0.01, add_extra_columns=True)
original_filtered_rspm = filtered_rspm.copy()
len(rspm)
len(filtered_rspm)
rspm = rspm[rspm.passive==False]
len(rspm)
# Ai94 data is not in here, thats good
rspm.full_genotype.unique()
rspm.head()
def get_default_features(single=False):
features = [
'all-images',
'omissions',
'licks',
'pupil',
'running',
# 'face_motion_energy', # ignore due to issues with QC
'hits',
'misses',
'false_alarms',
'correct_rejects',
# 'passive_change',
# 'beh_model'
]
if single:
features = ['single-'+feature for feature in features]
return features
features_to_plot = get_default_features(single=False)
level_up_features = [
'all-images',
'omissions',
'behavioral',
'task',
]
features_to_plot = level_up_features
df = rspm.copy()
cell_types = np.sort(df.cell_type.unique())[::-1]
experience_levels = np.sort(df.experience_level.unique())
Limit to multiscope, most recent active sessions
expts = experiments_table.copy()
expts = expts[expts.passive==False]
# expts = expts[expts.project_code=='VisualBehaviorMultiscope']
# expts = utilities.limit_to_last_familiar_second_novel_active(expts)
expts = utilities.limit_to_containers_with_all_experience_levels(expts)
Check distribution of exposure number for this set of experiments
expts.groupby(['cell_type', 'experience_level', 'prior_exposures_to_image_set']).mean().reset_index().groupby(['cell_type', 'experience_level']).describe()[['prior_exposures_to_image_set']]
Get cells for those experiments
cells = cells_table.copy()
# limit cells table to experiments in most recent active sessions
cells = cells[cells.ophys_experiment_id.isin(expts.index.values)]
utilities.count_mice_expts_containers_cells(cells)
Limit to matched cells and count
# now identify cells matched in all 3 experience levels
cells = utilities.limit_to_cell_specimen_ids_matched_in_all_experience_levels(cells)
# count
utilities.count_mice_expts_containers_cells(cells)
len(rspm.cell_specimen_id.unique())
rspm = rspm[rspm.ophys_experiment_id.isin(expts.index.values)]
len(rspm.cell_specimen_id.unique())
rspm = rspm[rspm.cell_specimen_id.isin(cells.cell_specimen_id.values)]
len(rspm.cell_specimen_id.unique())
utilities.count_mice_expts_containers_cells(rspm)
df = rspm.copy()
# group by cell_specimen_id and experience level and take the mean (shouldnt actually do anything here because we already filtered for one session of each type)
df = df.groupby(['cell_specimen_id', 'experience_level']).mean()
# restrict to the regressors we care about
df = df[features_to_plot]
# unstack to turn session number (the last index) into a column level
df = df.unstack()
# drop NaN just to be sure, but there shouldnt be any since we already filtered to get matched cells
df = df.dropna()
# now there is only one row per cell, with dropouts for all regressors, all sessions
df.head()
# get cell IDS
cell_specimen_ids = df.index.unique()
len(cell_specimen_ids)
fig, ax = plt.subplots(figsize=(5,8))
ax = sns.heatmap(df.values, cmap='RdBu_r', ax=ax, vmin=-1, vmax=1,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": model_output_type})
ax.set_ylabel('cells')
ax.set_xlabel('features')
ax.set_title('dropout scores for matched cells')
ax.set_ylim(0, df.shape[0])
ax.set_xlim(0, df.shape[1])
ax.set_xticks(np.arange(0, df.shape[1]));
ax.set_xticklabels(df.keys(), rotation=90)
ax.set_xticklabels(df.keys(), rotation=90);
# cluster_divisions = np.where(np.diff(new_sorted_labels)==1)[0]
# for y in cluster_divisions:
# ax.hlines(y, xmin=0, xmax=df.shape[1], color='k')
cell_specimen_ids = df.index.values
np.random.shuffle(cell_specimen_ids)
for cell_specimen_id in cell_specimen_ids[:100]:
cdf = df.loc[cell_specimen_id].copy()
cdf = pd.DataFrame(cdf)
cell_type = rspm[rspm.cell_specimen_id==cell_specimen_id].cell_type.values[0]
cell_type = cell_type.split(' ')[0]
fig, ax = plt.subplots(figsize=(8,0.5))
ax = sns.heatmap(cdf.T.abs(), cmap='Blues', ax=ax, vmin=0, vmax=1,
robust=True, cbar_kws={"drawedges": False, "shrink": 1, "label": 'fraction change\n explained variance'})
ax.figure.axes[-1].yaxis.label.set_size(15)
# ax.set_ylabel('cell_specimen_id')
ax.set_xlabel('')
ax.set_title('cell_specimen_id: '+str(cell_specimen_id)+' '+cell_type)
ax.set_ylim(0, cdf.shape[1])
ax.set_xlim(0, cdf.shape[0])
ax.set_xticks(np.arange(0.5, cdf.shape[0]+0.5));
# ax.set_xticklabels(cdf.keys(), rotation=90);
ax.set_xticklabels([key[1]+' - '+key[0] for key in list(cdf.index)], rotation=90);
ax.set_yticklabels('')
fig.tight_layout
# cluster_divisions = np.where(np.diff(new_sorted_labels)==1)[0]
# for y in cluster_divisions:
# ax.hlines(y, xmin=0, xmax=df.shape[1], color='k')
from visual_behavior.dimensionality_reduction import clustering as vba_clust
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering
X = df.copy()
# silhouette_scores = vba_clust.get_silhouette_scores(X, model=KMeans,
# n_clusters=np.arange(2, 30),
# metric='euclidean',
# n_boots=20)
fig, ax = vba_clust.plot_silhouette_scores(X, model=KMeans, n_clusters=np.arange(2, 30), metric='euclidean', n_boots=20,
model_output_type='KMeans euclidean')
fig, ax = plt.subplots(2, 1, figsize=(6, 10))
vba_clust.get_elbow_plots(X, n_clusters=np.arange(2, 30), ax=ax)
fig.tight_layout()
fig, ax = vba_clust.plot_silhouette_scores(X, model=SpectralClustering, n_clusters=np.arange(2, 30), metric='euclidean', n_boots=20,
model_output_type='Spectral clustering silhouette plot')
# %time
# spec_sil_scores_40 = vba_clust.get_silhouette_scores(X, model=SpectralClustering,
# n_clusters=np.arange(2, 25),
# metric='euclidean',
# n_boots=40)
# plt.plot(np.arange(2, 25),spec_sil_scores, 'k*-')
# plt.xlabel('N cluster')
# plt.ylabel('Silhouette score')
# plt.grid()
from sklearn.cluster import KMeans
X = df.copy()
kmeans = KMeans(n_clusters=13, random_state=0).fit(X)
kmeans.cluster_centers_.shape
labels = kmeans.labels_
sort_order = np.argsort(labels)
sorted_labels = labels[sort_order]
cluster_df = df.copy()
cluster_df['cluster_id'] = labels
# sc = SpectralClustering(n_clusters=13, assign_labels='discretize').fit(X)
# labels = sc.labels_
mean_df = cluster_df.groupby(['cluster_id']).mean()
sorted_mean_df = mean_df.sort_values(by=[('all-images', 'Novel 1'),('omissions', 'Novel 1'), ('behavioral', 'Novel 1'), ('task', 'Novel 1')])
cluster_order = sorted_mean_df.index.values
new_order = np.argsort(cluster_order)
new_labels = np.asarray([new_order[label] for label in labels])
new_sort_order = np.argsort(new_labels)
new_sorted_labels = new_labels[new_sort_order]
# plot dropout scores sorted by new order
fig, ax = plt.subplots(figsize=(5,8))
df = df.drop(columns=['agglo_cluster_id'])
ax = sns.heatmap(df.abs().values[new_sort_order], cmap='Blues', ax=ax, vmin=0, vmax=1,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.8, "label": 'fraction change in explained variance'})
ax.set_ylabel('cells')
ax.set_xlabel('features')
ax.set_title('k-means on dropout scores')
ax.set_ylim(0, df.shape[0])
ax.set_xlim(0, df.shape[1])
ax.set_xticks(np.arange(0.5, df.shape[1]+0.5));
# ax.set_xticklabels(df.keys(), rotation=90)
ax.set_xticklabels([key[1]+' - '+key[0] for key in list(df.keys())], rotation=90);
# ax.set_xticklabels([key[0]+' - '+key[1] for key in list(df.keys())], rotation=90);
ax.set_xlabel('')
ax2 = ax.twinx()
ax2.set_yticks([0, len(df)])
ax2.set_yticklabels([0, len(df)])
cluster_divisions = np.where(np.diff(new_sorted_labels)==1)[0]
for y in cluster_divisions:
ax.hlines(y, xmin=0, xmax=df.shape[1], color='k')
ax.set_yticks(np.hstack([np.asarray(0), cluster_divisions])+5);
ax.set_yticklabels(np.sort(cluster_df.cluster_id.unique())[::-1]);
ax.set_ylabel('cluster ID')
for x in np.arange(0, df.shape[0],3):
ax.vlines(x, ymin=0, ymax=df.shape[0], color='gray', linestyle='--', linewidth=0.5)
# plt.gca().invert_yaxis()
# invert axes
fig, ax = plt.subplots(figsize=(10,4))
ax = sns.heatmap(df.abs().values[new_sort_order].T, cmap='Blues', ax=ax, vmin=0, vmax=1,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": 'fraction change in \nexplained variance'})
# ax.set_title('clustering on single cell changes in coding across sessions')
ax.set_xlim(0, df.shape[0])
ax2 = ax.twiny()
ax2.set_ylabel('')
ax2.set_xlabel('cells')
ax2.set_ylim(0, df.shape[1])
ax2.set_yticks(np.arange(0.5, df.shape[1]+0.5));
# ax.set_yticklabels(df.keys(), rotation=0)
ax.set_yticklabels([key[1]+' - '+key[0] for key in list(df.keys())], rotation=0);
ax2.set_xticks([0, len(df)])
ax2.set_xticklabels([0, len(df)], fontsize=14)
# ax2.set_xticks(np.arange(0, len(df), 100))
# ax2.set_xticklabels(np.arange(0, len(df), 100), fontsize=12)
cluster_divisions = np.where(np.diff(new_sorted_labels)==1)[0]
for x in cluster_divisions:
ax.vlines(x, ymin=0, ymax=df.shape[1], color='k')
ax.set_xticks(np.hstack([np.asarray(0), cluster_divisions])+5);
ax.set_xticklabels(np.sort(cluster_df.cluster_id.unique()), rotation=0);
ax.set_xlabel('cluster ID')
plt.gca().invert_yaxis()
X = df.copy()
%time
labels = vba_clust.get_labels_for_coclust_matrix(X = X,
model = KMeans,
nboot = np.arange(100), n_clusters = 13)
# make coClustering matrix (also exists in vba_clust but needs debugging)
coClust_matrix = []
nboot = np.arange(100)
for i in range(np.shape(labels)[1]): # get cluster id of this observation
this_coClust_matrix = []
for j in nboot: # find other observations with this cluster id
clust_id = labels[j][i]
this_coClust_matrix.append(labels[j] == clust_id)
coClust_matrix.append(np.sum(this_coClust_matrix, axis=0) / max(nboot))
coClust_matrix = np.array(coClust_matrix)
# save coClust matrix
co_clustering_df = pd.DataFrame(data=coClust_matrix, columns=df.index, index=df.index)
fig, ax = plt.subplots(1,1, figsize=(15,15))
sns.heatmap(co_clustering_df, ax=ax, cmap = "Blues")
df
df
ct = cells_table.drop_duplicates(subset=['cell_specimen_id', 'ophys_container_id'])
ct = ct.set_index('cell_specimen_id')
df_meta = df.join(ct, on='cell_specimen_id', how='left')
df_meta
row_colors = []
col_colors = []
colors = {'Sst-IRES-Cre': (0.6196078431372549, 0.8549019607843137, 0.8980392156862745),
'Vip-IRES-Cre': (0.7725490196078432, 0.6901960784313725, 0.8352941176470589),
'Slc17a7-IRES2-Cre': (1.0, 0.596078431372549, 0.5882352941176471),
'VISp': sns.color_palette()[8],
'VISl': sns.color_palette()[2]
}
for cre in df_meta['cre_line'].values:
row_colors.append(colors[cre])
for tar_str in df_meta['targeted_structure'].values:
col_colors.append(colors[tar_str])
clust_map = sns.clustermap(coClust_matrix,cmap = 'Blues', row_colors=row_colors, col_colors=col_colors)
# get row order of the dendrogram
order = clust_map.dendrogram_row.reordered_ind
fig, ax = plt.subplots(1,1, figsize=(10, 20))
sns.heatmap(df.iloc[order].abs(), cmap='Blues')
plt.tight_layout()
#fig.savefig(os.path.join(fig_path, 'heatmap_of_coclust_order.png'))
from sklearn.cluster import AgglomerativeClustering
cluster = AgglomerativeClustering(n_clusters=13, affinity='euclidean', linkage='average')
agglo_clust = cluster.fit_predict(co_clustering_df.values)
cluster_labels = agglo_clust
# df_meta.loc[:,'agglo_cluster_id'] = list(agglo_clust)
df_meta.loc[:,'cluster_id'] = list(cluster_labels)
cluster_counts = df_meta.groupby(['cluster_id']).count()
n_cells = df_meta.groupby(['cluster_id']).count()[[('all-images', 'Familiar')]].rename(columns={('all-images', 'Familiar'):'n_cells_in_cluster'})
cluster_size_order = n_cells.sort_values(by='n_cells_in_cluster').index.values[::-1]
cluster_size_order
df_meta['new_cluster_id'] = [np.where(cluster_size_order==cluster_id)[0][0] for cluster_id in df_meta.cluster_id.values]
order = np.argsort(df_meta['new_cluster_id'].values)
cell_order = co_clustering_df.index.values[order][::-1]
co_clustering_df_sorted = co_clustering_df.reindex(columns=cell_order)
co_clustering_df_sorted = co_clustering_df_sorted.reindex(index=cell_order)
fig, ax = plt.subplots(1,1, figsize=(7,6))
ax = sns.heatmap(co_clustering_df_sorted, ax=ax, cmap = "Blues", square=True,
cbar_kws={'label':'probability of co-clustering', 'shrink':0.8})
ax.set_xticks(np.arange(0, co_clustering_df_sorted.shape[0], 400))
ax.set_xticklabels(np.arange(0, co_clustering_df_sorted.shape[0], 400))
ax.set_xlabel('cells')
ax.set_yticks(np.arange(0, co_clustering_df_sorted.shape[0], 400))
ax.set_yticklabels(np.arange(0, co_clustering_df_sorted.shape[0], 400))
ax.set_ylabel('cells')
ax.set_title('co-clustering matrix')
fig.tight_layout()
#
plt.gca().invert_xaxis()
plt.gca().invert_yaxis()
order = np.argsort(df_meta['new_cluster_id'].values)
cell_order = df.index.values[order]
cluster_id_order = df_meta['new_cluster_id'].values[order]
df_sorted = df.reindex(index=cell_order)
# df_sorted = df_sorted.drop(columns=['agglo_cluster_id'])
fig, ax = plt.subplots(figsize=(5,8))
ax = sns.heatmap(df_sorted.abs().values, cmap='Blues', ax=ax, vmin=0, vmax=1,
robust=True, cbar_kws={"drawedges": False, "shrink": 0.7, "label": 'fraction change in explained variance'})
ax.set_ylabel('cells')
ax.set_xlabel('features')
ax.set_title('hierarchical clustering\non co-clustering matrix')
ax.set_ylim(0, df_sorted.shape[0])
ax.set_xlim(0, df_sorted.shape[1])
ax.set_xticks(np.arange(0.5, df_sorted.shape[1]+0.5));
# ax.set_xticklabels(df.keys(), rotation=90)
ax.set_xticklabels([key[1]+' - '+key[0] for key in list(df_sorted.keys())], rotation=90);
# ax.set_xticklabels([key[0]+' - '+key[1] for key in list(df.keys())], rotation=90);
ax2 = ax.twinx()
ax2.set_yticks([0, len(df_sorted)])
ax2.set_yticklabels([0, len(df_sorted)])
ax2.set_xlabel('')
cluster_divisions = np.where(np.diff(df_meta.new_cluster_id.values[order])==1)[0]
for y in cluster_divisions:
ax.hlines(y, xmin=0, xmax=df_sorted.shape[1], color='k')
ax.set_yticks(np.hstack([np.asarray(0), cluster_divisions])+5);
ax.set_yticklabels(np.sort(df_meta.new_cluster_id.unique())[::-1]);
ax.set_ylabel('cluster ID')
for x in np.arange(3, df_sorted.shape[0],3):
ax.vlines(x, ymin=0, ymax=df_sorted.shape[0], color='gray', linestyle='--', linewidth=0.5)
# plt.gca().invert_yaxis()
cluster_df = df_sorted.copy()
cluster_df['cluster_id'] = cluster_id_order
cluster_df = cluster_df.join(df_meta[['cell_type', 'targeted_structure', 'binned_depth']], on='cell_specimen_id')
# cluster_df = cluster_df.set_index('cell_specimen_id')
cluster_df['cluster_id'] = cluster_id_order
# cluster_df = pd.DataFrame(index=df_sorted.index, columns=['cluster_id'], data=new_labels)
print(len(cluster_df))
# merge with metadata
metadata = rspm[['cell_specimen_id', 'cell_type', 'imaging_depth', 'targeted_structure', 'binned_depth']]
cluster_df = cluster_df.merge(metadata, on='cell_specimen_id')
print(len(cluster_df))
cluster_df = cluster_df.drop_duplicates(subset='cell_specimen_id')
print(len(cluster_df))
cluster_df.head()
sns.countplot(data=cluster_df, x='cluster_id', hue='cell_type')
len(cluster_df)
cluster_df = cluster_df.dropna()
len(cluster_df)
total_per_cell_type = cluster_df.groupby(['cell_type']).count()[['cell_specimen_id']].rename(columns={'cell_specimen_id':'n_total'})
total_per_cell_type
n_per_cluster = cluster_df.groupby(['cell_type', 'cluster_id']).count()[['cell_specimen_id']].rename(columns={'cell_specimen_id':'n_per_cluster'})
n_per_cluster = n_per_cluster.reset_index()
fraction_per_cluster = n_per_cluster.merge(total_per_cell_type, on=['cell_type'])
fraction_per_cluster['fraction'] = fraction_per_cluster.n_per_cluster.values/fraction_per_cluster.n_total.values
fraction_per_cluster
import visual_behavior.visualization.utils as utils
palette = utils.get_cre_line_colors()
cell_types = np.sort(cluster_df.cell_type.unique())
fig, ax = plt.subplots(figsize=(8,4))
ax = sns.barplot(data=fraction_per_cluster, x='cluster_id', y='fraction', hue='cell_type', palette=palette, ax=ax)
# ax.set_title('fraction of cells per cluster')
ax.set_ylabel('fraction of cells')
ax.legend(title='', fontsize='small')
fig.tight_layout()
palette = utils.get_cre_line_colors()
cell_types = np.sort(cluster_df.cell_type.unique())
fig, ax = plt.subplots(figsize=(4,8))
ax = sns.barplot(data=fraction_per_cluster, y='cluster_id', x='fraction', hue='cell_type',
palette=palette, orient='h', ax=ax)
# ax.set_title('fraction of cells per cluster')
# ax.set_ylabel('fraction of cells per cell type')
ax.legend(title='', fontsize='x-small', loc='upper right')
fig.tight_layout()
total_per_cell_type_and_area = cluster_df.groupby(['cell_type', 'targeted_structure']).count()[['cell_specimen_id']].rename(columns={'cell_specimen_id':'n_total'})
total_per_cell_type_and_area
n_per_cluster_area = cluster_df.groupby(['cell_type', 'targeted_structure', 'cluster_id']).count()[['cell_specimen_id']].rename(columns={'cell_specimen_id':'n_per_cluster'})
n_per_cluster_area = n_per_cluster_area.reset_index()
fraction_per_cluster_area = n_per_cluster_area.merge(total_per_cell_type_and_area, on=['cell_type', 'targeted_structure'])
fraction_per_cluster_area['fraction'] = fraction_per_cluster_area.n_per_cluster.values/fraction_per_cluster_area.n_total.values
fraction_per_cluster_area
palette = [sns.color_palette()[6], sns.color_palette()[2]]
areas = ['VISp', 'VISl']
cell_types = np.sort(cluster_df.cell_type.unique())
fig, ax = plt.subplots(1,3, figsize=(15,3), sharey=True)
for i, cell_type in enumerate(cell_types):
ax[i] = sns.barplot(data=fraction_per_cluster_area[fraction_per_cluster_area.cell_type==cell_type],
x='cluster_id', y='fraction', hue='targeted_structure', palette=palette, ax=ax[i])
ax[i].set_title(cell_type)
ax[i].get_legend().remove()
ax[i].set_ylabel('')
ax[0].set_ylabel('fraction of cells')
ax[0].legend(loc='upper left', title='')
fig.tight_layout()
depths = [75, 175, 275, 375]
cluster_ids = np.sort(cluster_df.cluster_id.unique())
total_per_cell_type_and_depth = cluster_df.groupby(['cell_type', 'binned_depth']).count()[['cell_specimen_id']].rename(columns={'cell_specimen_id':'n_total'})
total_per_cell_type_and_depth
n_per_cluster_depth = cluster_df.groupby(['cell_type', 'binned_depth', 'cluster_id']).count()[['cell_specimen_id']].rename(columns={'cell_specimen_id':'n_per_cluster'})
n_per_cluster_depth = n_per_cluster_depth.reset_index()
# make sure each depth and cluster is represented, even if there are none
for depth in depths:
for cluster_id in cluster_ids:
loc_df = n_per_cluster_depth[(n_per_cluster_depth.binned_depth==depth)&(n_per_cluster_depth.cluster_id==cluster_id)]
ind = len(n_per_cluster_depth)
if len(loc_df) == 0:
n_per_cluster_depth.loc[ind+1, :] = [cell_type, depth, cluster_id, 0]
fraction_per_cluster_depth= n_per_cluster_depth.merge(total_per_cell_type_and_depth, on=['cell_type', 'binned_depth'])
fraction_per_cluster_depth['fraction'] = fraction_per_cluster_depth.n_per_cluster.values/fraction_per_cluster_depth.n_total.values
fraction_per_cluster_depth['binned_depth'] = [int(depth) for depth in fraction_per_cluster_depth.binned_depth.values]
cluster_df
cluster_ids = pd.DataFrame(index=df_sorted.index, columns=['cluster_id'], data=cluster_id_order)
cluster_rspm = rspm.merge(cluster_ids, on='cell_specimen_id')
cluster_ids
# cluster_rspm['cluster_id'] = [int(cluster_id) for cluster_id in cluster_rspm.cluster_id.values]
for each cluster, look at average dropout for all features, for each session type
# cluster_ids = np.sort(rspm.cluster_id.unique())
# n_rows = int(len(cluster_ids)/5.)
# fig, ax = plt.subplots(2, 10, figsize=(15,4), sharex=True, sharey=True)
# ax = ax.ravel()
# for i,cluster_id in enumerate(cluster_ids):
# mean_dropouts_for_cluster = rspm[rspm.cluster_id==cluster_id].groupby('experience_level').mean()[features_to_plot]
# ax[i] = sns.heatmap(mean_dropouts_for_cluster.T, cmap='RdBu', vmin=-1, vmax=1, ax=ax[i], cbar=False,)#cbar_kws={'shrink':0.7, 'label':model_output_type})
# ax[i].set_title('cluster_id: '+str(cluster_id))
# ax[i].set_ylim(-0.5, 4.5)
# ax[i].set_xlabel('')
# # ax[i].set_
# fig.tight_layout()
cluster_ids = np.sort(cluster_rspm.cluster_id.unique())
fig, ax = plt.subplots(3, 5, figsize=(8,6), sharex=True, sharey=True)
ax = ax.ravel()
for i,cluster_id in enumerate(cluster_ids):
mean_dropouts_for_cluster = cluster_rspm[cluster_rspm.cluster_id==cluster_id].groupby('experience_level').mean()[features_to_plot]
ax[i] = sns.heatmap(mean_dropouts_for_cluster.T, cmap='RdBu', vmin=-1, vmax=1, ax=ax[i], cbar=False,)#cbar_kws={'shrink':0.7, 'label':model_output_type})
ax[i].set_title('cluster '+str(int(cluster_id)), fontsize=14)
ax[i].set_ylim(-0.5, 4.5)
ax[i].set_xlabel('')
# ax[i].set_
fig.tight_layout()
cluster_ids = np.sort(cluster_rspm.cluster_id.unique())
palette = utils.get_cre_line_colors()
fig, ax = plt.subplots(2, 13, figsize=(22,6), sharex='row', sharey='row')
ax = ax.ravel()
for i,cluster_id in enumerate(cluster_ids):
mean_dropouts_for_cluster = cluster_rspm[cluster_rspm.cluster_id==cluster_id].groupby('experience_level').mean()[features_to_plot]
ax[i] = sns.heatmap(mean_dropouts_for_cluster.T, cmap='RdBu', vmin=-1, vmax=1, ax=ax[i], cbar=False,)#cbar_kws={'shrink':0.7, 'label':model_output_type})
ax[i].set_title('cluster '+str(int(cluster_id)), fontsize=14)
ax[i].set_ylim(-0.5, 4.5)
ax[i].set_xlabel('')
fraction = np.round(fraction_per_cluster[fraction_per_cluster.cluster_id==cluster_id].fraction.values[0]*100,1)
n_cells = int(len(cluster_rspm[cluster_rspm.cluster_id==cluster_id].cell_specimen_id.unique()))
ax[i].set_title('cluster '+str(int(cluster_id))+'\n'+str(fraction)+'%, n='+str(n_cells), fontsize=16)
data = fraction_per_cluster[fraction_per_cluster.cluster_id==cluster_id]
ax[i+len(cluster_ids)] = sns.barplot(data=data,
x='cell_type', y='fraction', #hue='cell_type',
order=cell_types, #hue_order=cell_types,
palette=palette, ax=ax[i+len(cluster_ids)])
ax[i+len(cluster_ids)].set_ylabel('fraction of cells')
ax[i+len(cluster_ids)].set_xlabel('')
# ax[i+len(cluster_ids)].get_legend().remove()
ax[i+len(cluster_ids)].set_xticklabels(cell_types, rotation=90)
if i != 0:
ax[i+len(cluster_ids)].set_ylabel('')
# ax[i].set_
fig.tight_layout()
cluster_ids = np.sort(cluster_rspm.cluster_id.unique())
palette = utils.get_cre_line_colors()
fig, ax = plt.subplots(2, 13, figsize=(23,6), sharex='row', sharey='row')
ax = ax.ravel()
for i,cluster_id in enumerate(cluster_ids):
mean_dropouts_for_cluster = cluster_rspm[cluster_rspm.cluster_id==cluster_id].groupby('experience_level').mean()[features_to_plot]
ax[i] = sns.heatmap(mean_dropouts_for_cluster.T, cmap='RdBu', vmin=-1, vmax=1, ax=ax[i], cbar=False,)#cbar_kws={'shrink':0.7, 'label':model_output_type})
ax[i].set_title('cluster '+str(int(cluster_id)), fontsize=14)
ax[i].set_ylim(-0.5, 4.5)
ax[i].set_xlabel('')
fraction = np.round(fraction_per_cluster[fraction_per_cluster.cluster_id==cluster_id].fraction.values[0]*100,1)
n_cells = int(len(cluster_rspm[cluster_rspm.cluster_id==cluster_id].cell_specimen_id.unique()))
ax[i].set_title('cluster '+str(int(cluster_id))+'\n'+str(fraction)+'%, n='+str(n_cells), fontsize=16)
data = fraction_per_cluster[fraction_per_cluster.cluster_id==cluster_id]
ax[i+len(cluster_ids)] = sns.barplot(data=data,
x='cell_type', y='fraction', #hue='cell_type',
order=cell_types, #hue_order=cell_types,
palette=palette, ax=ax[i+len(cluster_ids)])
ax[i+len(cluster_ids)].set_ylabel('fraction of cells')
ax[i+len(cluster_ids)].set_xlabel('')
# ax[i+len(cluster_ids)].get_legend().remove()
ax[i+len(cluster_ids)].set_xticklabels(cell_types, rotation=90)
if i != 0:
ax[i+len(cluster_ids)].set_ylabel('')
# ax[i].set_
fig.tight_layout()
import visual_behavior.visualization.ophys.platform_paper_figures as ppf
from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorOphysProjectCache
cache_dir = loading.get_platform_analysis_cache_dir()
cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir=cache_dir)
%%time
cache_dir = loading.get_platform_analysis_cache_dir()
cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir)
# set various params
df_name = 'omission_response_df'
conditions = ['cell_specimen_id']
use_events = True
filter_events = True
# load multi_session_df
multi_session_df = loading.get_multi_session_df(cache_dir, df_name, conditions, experiments_table,
use_events=use_events, filter_events=filter_events)
print(len(multi_session_df.ophys_experiment_id.unique()))
original_multi_session_df = multi_session_df.copy()
# limit to platform paper dataset
multi_session_df = multi_session_df[multi_session_df.ophys_experiment_id.isin(experiments_table.index.values)]
print(len(multi_session_df.ophys_experiment_id.unique()))
# merge with metadata
multi_session_df = multi_session_df.merge(experiments_table, on='ophys_experiment_id')
print(len(multi_session_df.ophys_experiment_id.unique()))
multi_session_df = multi_session_df.reset_index(drop=True)
cluster_mdf = multi_session_df.merge(cluster_df[['cell_specimen_id', 'cluster_id']], on='cell_specimen_id')
# filter
project_code = 'VisualBehaviorMultiscope'
# get timestamps
multiscope_expt = experiments_table[experiments_table.project_code==project_code].index.values[0]
timestamps = ppf.get_timestamps_for_response_df_type(cache, multiscope_expt, df_name)
print(len(timestamps))
axes_column = 'cluster_id'
hue_column = 'experience_level'
hue_conditions = np.sort(cluster_mdf[hue_column].unique())
palette = utilities.get_experience_level_colors()
xlim_seconds = [-1,1.5]
change = False
omitted = True
xlabel = 'time (sec)'
sdf = cluster_mdf[cluster_mdf.project_code==project_code].copy()
# remove traces with incorrect length - why does this happen?
sdf = sdf.reset_index(drop=True)
indices = [index for index in sdf.index if len(sdf.iloc[index].mean_trace) == 107]
sdf = sdf.loc[indices]
print(len(sdf.mean_trace.values[0]))
depths = [75, 175, 275, 375]
areas = ['VISp', 'VISl']
# cluster_ids = np.sort(cluster_rspm.cluster_id.unique())
# cluster_ids = cluster_order.copy()
# cluster_ids = fraction_per_cluster.cluster_id.values[np.argsort(fraction_per_cluster.fraction.values)]
cluster_ids = np.sort(cluster_rspm.cluster_id.unique())
# vip_cluster_order = [9, 0, 1, 2, 3, 8, 4, 6, 5, 7]
# cluster_ids = vip_cluster_order
palette = utils.get_experience_level_colors()
n_clusters = len(cluster_ids)
fig, ax = plt.subplots(5, n_clusters, figsize=(n_clusters*2, 14), sharex='row', sharey='row', gridspec_kw={'height_ratios': [3, 2, 2, 1, 2]})
ax = ax.ravel()
for i,cluster_id in enumerate(cluster_ids):
mean_dropouts_for_cluster = cluster_rspm[cluster_rspm.cluster_id==cluster_id].groupby('experience_level').mean()[features_to_plot]
ax[i] = sns.heatmap(mean_dropouts_for_cluster.T, cmap='RdBu', vmin=-1, vmax=1, ax=ax[i], cbar=False,)#cbar_kws={'shrink':0.7, 'label':model_output_type})
fraction = np.round(fraction_per_cluster[fraction_per_cluster.cluster_id==cluster_id].fraction.values[0]*100,1)
n_cells = int(len(cluster_rspm[cluster_rspm.cluster_id==cluster_id].cell_specimen_id.unique()))
ax[i].set_title('cluster '+str(int(cluster_id))+'\n'+str(fraction)+'%, n='+str(n_cells), fontsize=16)
ax[i].set_yticklabels(mean_dropouts_for_cluster.keys(), rotation=0)
ax[i].set_ylim(-0.5, 4.5)
ax[i].set_xlabel('')
palette = utils.get_cre_line_colors()
data = fraction_per_cluster[fraction_per_cluster.cluster_id==cluster_id]
ax[i+len(cluster_ids)] = sns.barplot(data=data, orient='h',
y='cell_type', x='fraction', #hue='cell_type',
# order=depths, #hue_order=cell_types,
palette=palette, ax=ax[i+len(cluster_ids)])
ax[i+len(cluster_ids)].set_ylabel('')
ax[i+len(cluster_ids)].set_xlabel('fraction cells\nper class')
ax[i+len(cluster_ids)].set_yticks(np.arange(0, len(cell_types)))
ax[i+len(cluster_ids)].set_yticklabels(cell_types, rotation=0)
data = fraction_per_cluster_depth[fraction_per_cluster_depth.cluster_id==cluster_id]
ax[i+(len(cluster_ids)*2)] = sns.barplot(data=data, orient='h',
y='binned_depth', x='fraction', #hue='cell_type',
# order=depths, #hue_order=cell_types,
palette='gray', ax=ax[i+(len(cluster_ids)*2)])
ax[i+(len(cluster_ids)*2)].set_ylabel('depth (um)')
ax[i+(len(cluster_ids)*2)].set_xlabel('fraction cells\nper depth')
ax[i+(len(cluster_ids)*2)].set_yticks(np.arange(0, len(depths)))
ax[i+(len(cluster_ids)*2)].set_yticklabels(depths, rotation=0)
data = fraction_per_cluster_area[fraction_per_cluster_area.cluster_id==cluster_id]
ax[i+(len(cluster_ids)*3)] = sns.barplot(data=data, orient='h',
y='targeted_structure', x='fraction', #hue='cell_type',
# order=depths, #hue_order=cell_types,
palette='gray', ax=ax[i+(len(cluster_ids)*3)])
ax[i+(len(cluster_ids)*3)].set_ylabel('area')
ax[i+(len(cluster_ids)*3)].set_xlabel('fraction cells\nper area')
ax[i+(len(cluster_ids)*3)].set_yticklabels(areas, rotation=0)
# plot mean traces
palette = utils.get_experience_level_colors()
for c, hue in enumerate(hue_conditions):
traces = sdf[(sdf['cluster_id'] == cluster_id) & (sdf[hue_column] == hue)].mean_trace.values
ax[i+(len(cluster_ids)*4)] = utils.plot_mean_trace(np.asarray(traces), timestamps, ylabel='response',
legend_label=hue, color=palette[c], interval_sec=1, plot_sem=False,
xlim_seconds=xlim_seconds, ax=ax[i+(len(cluster_ids)*4)])
ax[i+(len(cluster_ids)*4)] = utils.plot_flashes_on_trace(ax[i+(len(cluster_ids)*4)], timestamps, change=change, omitted=omitted)
ax[i+(len(cluster_ids)*4)].axvline(x=0, ymin=0, ymax=1, linestyle='--', color='gray')
ax[i+(len(cluster_ids)*4)].set_title('')
ax[i+(len(cluster_ids)*4)].set_xlim(xlim_seconds)
ax[i+(len(cluster_ids)*4)].set_xlabel(xlabel)
if i != 0:
ax[i+len(cluster_ids)].set_ylabel('')
ax[i+(len(cluster_ids)*2)].set_ylabel('')
ax[i+(len(cluster_ids)*3)].set_ylabel('')
ax[i+(len(cluster_ids)*4)].set_ylabel('')
# ax[i].set_
fig.tight_layout()
# plt.suptitle(cell_type, x=0.53, y=1.03, fontsize=20)